# -*- coding: utf-8 -*-
"""
Created on Thu Mar 24 13:13:14 2022

@author: admin
"""
#note: need pandas version < 1.4.0

import numpy as np
import pandas as pd
import pickle
#import pyarrow
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf
import statsmodels.tsa.stattools as smtsa
import matplotlib.ticker as mtick
import scipy.stats as stats
import scipy.optimize as optimize
import numpy_financial as npf
from scipy.optimize import fsolve

#go to the directory where this file is located
import os
os.chdir(os.path.dirname(os.path.abspath(__file__)))



data1=pd.read_excel('Q_1694m3-1860m2.xlsx')
data2=pd.read_excel('Q_1860m3-1881m2.xlsx')
data3=pd.read_excel('Q_1881m1-1947m12.xlsx')
data4=pd.read_excel('Q_1900m11-1947m12.xlsx')
data5=pd.read_excel('Q_1946m1-2018m12.xlsx')

data=pd.concat([data1,data2,data3,data4,data5], ignore_index = True)

##price data

dataP = pd.read_excel('P_1976m1-2018m12.xlsx',sheet_name='UK Gilts 1975 onwards')
dataP2 = pd.read_excel('P_1963m1-1975m12.xlsx',sheet_name='UK Gilts')
dataP=dataP.append(dataP2,ignore_index=True)

dataP2 = pd.read_excel('P_1949m1-1962m12.xlsx',sheet_name='UK Gilts')
dataP=dataP.append(dataP2,ignore_index=True)

dataP2 = pd.read_excel('P_1881m1-1949m3.xlsx',sheet_name='UK Gilts P old')
dataP=dataP.append(dataP2,ignore_index=True)

dataP2 = pd.read_excel('P_1860m3-1888m2.xlsx',sheet_name='UK Gilts P old')
dataP=dataP.append(dataP2,ignore_index=True)

dataP2 = pd.read_excel('P_1729m7-1860m2.xlsx',sheet_name='UK Gilts P old')
dataP=dataP.append(dataP2,ignore_index=True)


dataP['YM']=pd.to_datetime(dataP['Year'].astype(int).astype(str)  + dataP['Month'].astype(int).astype(str), format='%Y%m')




data=data.loc[~data['ID'].isnull()]

data['proxy']=1;

data_proxy=data[['ID','proxy']].groupby('ID').sum()

data_proxy=data_proxy.reset_index()

check=data.loc[data['ID']==100042]

data=data.merge(data_proxy,on='ID',how='left')


#check=data.loc[data['Month'].isnull()]


data=data.loc[data['Year']>=1729]
#data=data.loc[data['Year']<=1946]

data['YM']=pd.to_datetime(data['Year'].astype(int).astype(str)  + data['Month'].astype(int).astype(str), format='%Y%m')

data=data.merge(dataP[['ID','YM','P']],on=['ID','YM'])

data=data.rename(columns={'proxy_y':'Survival(months)'})

data=data.sort_values(['ID','YM'])

mask=data['ID']!=data['ID'].shift(-1)
proxy=data[['ID','YM']].loc[mask]

proxy=proxy.rename(columns={'YM':'Implied-Mature'})

data=data.merge(proxy,left_on='ID',right_on='ID')

data['time_to_maturity']=((data['Implied-Mature']-data['YM'])/np.timedelta64(1, 'M')).astype(int)+1
data['ytm']=0;
data['duration']=0;

data=data.loc[data['Month']==12]
#data['time_to_maturity']=(data['time_to_maturity']/12).round(0).astype(int)
#data=data.loc[data['time_to_maturity']>0]

################compute annual return
data=data.sort_values(['ID','Year'])

data['Return']=np.exp(np.log(data['P']).diff())+data['Coupon']/100

mask=(data['ID']!=data['ID'].shift(1))

data.loc[mask,'Return']=np.nan

##############
data['ReturnXQ']=data['Return']*data['Q'].values
data.loc[data['ReturnXQ']!=data['ReturnXQ'],'Q']=np.nan


returnindex=data[['Q','Year','ReturnXQ']].groupby('Year').sum()
returnindex['Return']=returnindex['ReturnXQ']/returnindex['Q'].values

returnindex=returnindex.drop(columns={'Q','ReturnXQ'})

returnindex.to_xlsx('UKgiltreturn.xlsx')

check=data.loc[data['Year'].isin([1784,1785])]



